#
# Copyright 2024 WebAssembly Community Group participants
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

'''
Wasm extractor for testcases generated by the ClusterFuzz run.py script. This is
general enough to also handle Fuzzilli output.

Usage:

extract_wasms.py INFILE.js OUTFILE

That will find embedded wasm files in INFILE.js, of the form

  new Uint8Array([..wasm_contents..]);

and extract them into OUTFILE.0.wasm, OUTFILE.1.wasm, etc. It also emits
OUTFILE.js which will no longer contain the embedded contents, after which the
script can be run as

  d8 OUTFILE.js -- OUTFILE.0.wasm

That is, the embedded file can now be provided as a filename argument.
'''

import re
import sys

file_counter = 0


def get_wasm_filename():
    global file_counter
    file_counter += 1
    return f'{out}.{file_counter - 1}.wasm'


in_js = sys.argv[1]
out = sys.argv[2]

with open(in_js) as f:
    js = f.read()


def repl(match):
    text = match.group(0)

    # We found something of the form
    #
    #   new Uint8Array([..binary data as numbers..]);
    #
    # See if the numbers are the beginnings of a wasm file, "\0asm". If so, we
    # assume it is wasm. (We are careful here because Fuzzilli output can
    # contain normal JavaScript Typed Arrays, which we do not want to touch.)
    numbers = match.groups()[0]
    numbers = numbers.split(',')

    try:
        # Handle both base 10 and 16 by passing in base 0.
        parsed = [int(n, 0) for n in numbers]
        binary = bytes(parsed)
    except ValueError:
        # Not wasm; return the existing text.
        return text

    if binary[:4] != b'\0asm':
        return text

    # It is wasm. Parse out the numbers into a binary wasm file.
    with open(get_wasm_filename(), 'wb') as f:
        f.write(binary)

    # Replace the Uint8Array with undefined + a comment.
    return 'undefined /* extracted wasm */'


# Replace the wasm files and write them out. We investigate any new Uint8Array
# on an array of values like [100, 200] or [0x61, 0x6D, 0x6a] etc.
js = re.sub(r'new Uint8Array\(\[([\d,x a-fA-F]+)\]\)', repl, js)

# Write out the new JS.
with open(f'{out}.js', 'w') as f:
    f.write(js)
